\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{url}
\usepackage{verbatim}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 插入代码
\usepackage{listings}
\usepackage{color}

% 设置列表的样式
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

\lstdefinestyle{mystyle}{
    backgroundcolor=\color{backcolour},   
    commentstyle=\color{codegreen},
    keywordstyle=\color{magenta},
    numberstyle=\tiny\color{codegray},
    stringstyle=\color{codepurple},
    basicstyle=\ttfamily\footnotesize,
    breakatwhitespace=false,         
    breaklines=true,                 
    captionpos=b,                    
    keepspaces=true,                 
    numbers=left,                    
    numbersep=5pt,                  
    showspaces=false,                
    showstringspaces=false,
    showtabs=false,                  
    tabsize=2
}

\lstset{style=mystyle}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{R语言统计入门第4章：描述性统计和图形}
\author{PD ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{目录 Descriptive statistics and graphics }

\begin{enumerate}
\item[4.1.] 单组的汇总统计量 Summary statistics for a single group 
\item[4.2.] 分布的图形展示 Graphical display of distributions 
\item[4.3.] 分组数据的汇总统计量 Summary statistics by groups 
\item[4.4.] 分组数据作图 Graphics for grouped data 
\item[4.5.] 表格 Tables 
\item[4.6.] 表格的图形显示 Graphical display of tables 
\item[4.7.] 书中习题 Exercises 
\item[4.8.] 练习（选择题）
\item[4.9.] 练习（简答题）
\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{课程讲解重点难点 }

\begin{enumerate}

%\item  计算单组数据和分组数据的描述性统计量。
%\item  画出直方图、经验分布函数、QQ图、箱式图、条形图、带状图、点图、饼图。

%\item 单组数据的汇总统计量：均值，标准差，四分位数
%\item 分布的图形展示：直方图，经验累积分布函数，QQ图，箱式图
%\item 分组数据：汇总统计量 tapply 函数，作图：直方图，并联箱式图，带状图
%\item 表格：创建表格，边际表格，相对频数，图形显示

\item  单组数据的均值、标准差、方差、中位数，summary函数和五数汇总，直方图，经验分布函数，QQ图，箱式图。

\item  分组数据的汇总函数tapply函数, 分组直方图hist函数，并联箱式图boxplot函数，带状图stripchart函数。

\item  生成表格，边际表格和相对频数，表格的条形图barplot函数，点图dotchart函数，饼图pie函数。

\end{enumerate}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.1.a. 基本统计量 }%Summary statistics for a single group}

\begin{itemize}
\item  {\color{red}问题：生成50个标准正态分布的随机数。}
%\item 解答：
\begin{lstlisting}[language=R]
x <- rnorm(50)
\end{lstlisting}


\item  {\color{red}计算这50个数的均值、标准差、方差、和中位数。}
%\item 解答：
\begin{lstlisting}[language=R]
mean(x)
sd(x)
var(x)
median(x)
\end{lstlisting}


\item  {\color{red}问题：计算这50个数的最大值、最小值、和三个四分位数。}
%\item 解答：
\begin{lstlisting}[language=R]
quantile(x)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.1.b. 计算数据框的某列数据的均值}

\begin{itemize}
\item  {\color{red}问题：取出 \,{\color{blue}\verb+juul+} 数据框，计算 \,{\color{blue}\verb+igf1+} 这一列数据的均值。}
\item 解答：使用参数设置 \,{\color{blue}\verb+na.rm=T+} 来忽略缺失数据。
\begin{lstlisting}[language=R]
attach(juul)
mean(igf1)
mean(igf1,na.rm=T)
\end{lstlisting}

\item  {\color{red}计算 \,{\color{blue}\verb+igf1+} 这一列数据中没友缺失的数据的个数。}
\item 解答：两种方法。都使用 \,{\color{blue}\verb+is.na()+} 函数来判断是否缺失数据。
\begin{lstlisting}[language=R]
length(igf1) - sum(is.na(igf1))
sum(!is.na(igf1))
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.1.c. 计算数据框的汇总统计量 }

\begin{itemize}
\item  {\color{red}计算数据 \,{\color{blue}\verb+igf1+} 和数据框 \,{\color{blue}\verb+juul+} 的汇总统计量。}
%\item 解答：
\begin{lstlisting}[language=R]
summary(igf1)
summary(juul)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.1.d. 数据属性转换 }

\begin{itemize}

\item  {\color{red}问题：将数据 \texttt{sex}, \texttt{menarche}, 和 \texttt{tanner} 改为因子型变量。}

\item 解答：使用 \,{\color{blue}\verb+factor+} 函数来转换变量的属性。
\begin{lstlisting}[language=R]
detach(juul)
juul$sex <- factor(juul$sex,labels=c("M","F"))
juul$menarche <- factor(juul$menarche, 
+   labels=c("No","Yes"))
juul$tanner <- factor(juul$tanner, 
+   labels=c("I","II","III","IV","V"))
attach(juul)
summary(juul)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.1.e. 数据属性转换 }

\begin{itemize}

\item  {\color{red}问题：使用 \,{\color{blue}\verb+transform+} 函数完成同样的事情。}
%\item 解答：
\begin{lstlisting}[language=R]
juul <- transform(juul,
+   sex=factor(sex,labels=c("M","F")),
+   menarche=factor(menarche,labels=c("No","Yes")),
+   tanner=factor(tanner, 
+      labels=c("I","II","III","IV","V")))
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.1.a. 直方图 Histograms}

\begin{itemize}
\item  {\color{red}问题：生成 50 个标准正态分布的随机数，画出其直方图。}
%\item 解答：
\begin{lstlisting}[language=R]
x <- rnorm(50)
hist(x)
\end{lstlisting}

\item  {\color{red}问题： Altman (1991, pp. 25–26) contains an example of accident rates by age group. These are given as a count in age groups 0–4, 5–9, 10–15, 16, 17, 18–19, 20–24, 25–59, and 60–79 years of age. 画出频率直方图。}

%\item 解答：
\begin{lstlisting}[language=R]
mid.age <- c(2.5,7.5,13,16.5,17.5,19,22.5,44.5,70.5)
acc.count <- c(28,46,58,20,31,64,149,316,103)
age.acc <- rep(mid.age,acc.count)
brk <- c(0,5,10,16,17,18,20,25,60,80)
hist(age.acc,breaks=brk)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.1.b. 区间长度不均的一个直方图}

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.7\textwidth]{altman-age-acc.png}
 \caption{Histogram with unequal divisions.}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.2.a. Empirical cumulative distribution function}

\begin{itemize}
\item  {\color{red}问题：解释经验分布函数的概念。}
\item  Answer: The {\color{red}empirical cumulative distribution function} is defined as the fraction of data smaller than or equal to $x$. That is, if $x$ is the $k$th smallest observation, then the proportion ${k}/{n}$ of the data is smaller than or equal to $x$. 


\item  {\color{red}生成 20 个标准正态分布的随机数，画出其经验分布函数。}
\item 解答：
\begin{lstlisting}[language=R]
x <- rnorm(20)
n <- length(x)
plot(sort(x),(1:n)/n,type='s',ylim=c(0,1))  #经验分布函数
curve(pnorm(x),add=T)  #理论分布函数
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.2.b. 20个标准正态分布的随机数的经验分布函数}

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.7\textwidth]{emp-cdf.png}
 \caption{Empirical cumulative distribution function.}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.2.c. Glivenko - Cantelli Theorem }

\begin{itemize}

\item  {\color{red}问题：经验分布函数收敛于理论分布函数。这是什么意义下的收敛？}

\item  解答：Let $Z_1, \cdots, Z_n$ be i.i.d. real-valued random variables with distribution function $F(z) = \mathbb{P}(Z_i \le z)$. Denote the standard empirical distribution function by 
$$ F_n(z) = \frac{1}{n} \sum\limits_{i=1}^{n} I(Z_i\le z).$$
Then 
$$\mathbb{P} \left( \sup\limits_{z\in\mathbb{R}} |F(z) - F_n(z)| >\varepsilon \right)
\le 8(n+1)\exp\left( -\frac{n\varepsilon^2}{32} \right), 
$$
and in particular, by the Borel - Cantelli lemma, we have 
$$ \lim\limits_{n\to \infty} \sup\limits_{z\in\mathbb{R}} |F(z) - F_n(z)| = 0, \,\, \text{almost surely}. 
$$

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.3.a. QQ图 Q–Q plots (Quantile versus Quantile)}

\begin{itemize}
\item  {\color{red}问题：如何检验数据是否服从正态分布？}

\item 解答：一种方法是看 QQ图是否成直线。

\item  {\color{red}随机生成50个正态分布的随机数，画出与正态分布的QQ图。}
%\item 解答：
\begin{lstlisting}[language=R]
x <- rnorm(50); qqnorm(x)
\end{lstlisting}

\item  {\color{red}问题：解释QQ图的原理。}

\item 解答：One purpose of calculating the {\color{red}empirical cumulative distribution function} is to see whether data can be assumed normally distributed. For a better assessment, you might plot the $k$th smallest observation against the expected value of the $k$th smallest observation out of n in a standard normal distribution. 


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.3.b. 20个随机数的正态分布QQ图 }

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.7\textwidth]{qq-20-points.png}
 \caption{Q–Q plot using {\texttt{qqnorm(x)}}.}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.4.a. 箱线图 Boxplots: box-whisker plot}

\begin{itemize}
\item  {\color{red}问题：画出 IgM 数据的箱线图。}

%\item 解答：
\begin{lstlisting}[language=R]
> par(mfrow=c(1,2))
> boxplot(IgM)
> boxplot(log(IgM))
> par(mfrow=c(1,1))
\end{lstlisting}

\item  {\color{red}解释箱式图的画法。}

\item 解答：The box in the middle indicates ``{\color{red}hinges}'' and {\color{red}median}. The lines (``{\color{red}whiskers}'') show the largest or smallest observation that falls within a distance of 1.5 times the box size from the nearest hinge. If any observations fall farther away, the additional points are considered ``extreme'' values and are shown separately.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.2.4.b. IgM数据和其对数数据的箱式图 }

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.7\textwidth]{box-igm-logigm.png}
 \caption{Boxplots for IgM and log IgM.}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.3.a. 分组数据的汇总统计量 Summary statistics by groups}

\begin{itemize}
\item  {\color{red}问题：载入\,{\color{blue}\texttt{red.cell.folate}} 数据框，按 \,{\color{blue}\texttt{ventilation}} 分组，计算每组中 \,{\color{blue}\texttt{folate}} 数据的均值、标准差、观测数。}

%\item 解答：
\begin{lstlisting}[language=R]
attach(red.cell.folate)
tapply(folate, ventilation, mean)
tapply(folate, ventilation, sd)
tapply(folate, ventilation, length)
\end{lstlisting}

\item  {\color{red}载入 \,{\color{blue}\verb+juul+} 数据框，按 \,{\color{blue}\verb+tanner+} 分组，计算每组中 \,{\color{blue}\verb+igf1+} 数据的均值。遇到缺失数据作删除处理。}

%\item 解答：
\begin{lstlisting}[language=R]
attach(juul)
tapply(igf1, tanner, mean)
tapply(igf1, tanner, mean,na.rm=T)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.3.b. aggregate函数和 by 函数}

\begin{itemize}
\item  {\color{red}问题：按性别 \,{\color{blue}\verb+sex+} 分组，分别计算  \,{\color{blue}\verb+juul+} 数据框中的年龄 \,{\color{blue}\verb+age+}  的均值和生长因子 \,{\color{blue}\verb+igf1+} 的均值。}

\item 解答：使用 \,{\color{blue}\verb+aggregate+} 函数。注意到第一个参数是要计算均值的变量，第二个参数是用来分组的变量，第三个参数的计算均值的函数名。
\begin{lstlisting}[language=R]
aggregate(juul[c('age','igf1')],list(sex=juul$sex),
+   mean,na.rm=T)
aggregate(juul[c('age','igf1')],juul['sex'],mean,na.rm=T)
\end{lstlisting}

\item  {\color{red}按性别 \,{\color{blue}\verb+sex+} 分组，对数据框 \,{\color{blue}\verb+juul+} 的数据球汇总统计量。}

\item 解答：使用 \,{\color{blue}\verb+by+} 函数。
\begin{lstlisting}[language=R]
by(juul,juul['sex'],summary)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.4.1.a. 分组数据的直方图 Histograms for grouped data}

\begin{itemize}
\item  {\color{red}问题：载入 \,{\color{blue}\verb+energy+} 数据，按照 \,{\color{blue}\verb+stature+} 分组，分别画出 \,{\color{blue}\verb+expend+} 数据的直方图。}

\item 解答：使用 \,{\color{blue}\verb+par()+} 函数画出两个直方图。
\begin{lstlisting}[language=R]
attach(energy)
expend.lean <- expend[stature=="lean"]
expend.obese <- expend[stature=="obese"]
par(mfrow=c(2,1))
hist(expend.lean,breaks=10,xlim=c(5,13),ylim=c(0,4), 
+   col="white")
hist(expend.obese,breaks=10,xlim=c(5,13),ylim=c(0,4),
+   col="grey")
par(mfrow=c(1,1))
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.4.1.b.  按胖瘦分组的能量消耗数据直方图 }

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.7\textwidth]{energy-expend-hist-stature.png}
 \caption{Histograms with refinements.}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.4.2. 并联箱式图 Parallel boxplots}

\begin{itemize}
\item  {\color{red}问题：对 \,{\color{blue}\verb+energy+} 数据按照 \,{\color{blue}\verb+stature+} 分组画出 \,{\color{blue}\verb+expend+} 的并联箱式图。}

%\item 解答：
\begin{lstlisting}[language=R]
attach(energy)
boxplot(expend ~ stature)
\end{lstlisting}

\begin{figure}
\centering
\includegraphics[height=0.4\textheight, width=0.3\textwidth]{energy-expend-box-stature.png}
\caption{Parallel boxplot.}
\end{figure}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.4.3.a. 带状图 Stripcharts}

\begin{itemize}
\item  {\color{red}问题：对 \,{\color{blue}\verb+energy+} 数据按照 \,{\color{blue}\verb+stature+} 分组画出 \,{\color{blue}\verb+expend+} 的带状图。}

%\item 解答：
\begin{lstlisting}[language=R]
opar <- par(mfrow=c(2,2), mex=0.8, mar=c(3,3,2,1)+.1)
stripchart(expend ~ stature)
stripchart(expend ~ stature, method='stack')
stripchart(expend ~ stature, method='jitter')
stripchart(expend ~ stature, method='jitter', jitter=.03)
par(opar)
\end{lstlisting}

\item  {\color{red}问题：解释参数  \,{\color{blue}\verb+mex+} 和  \,{\color{blue}\verb+mar+} 的含义。}

\item 解答：The \,{\color{blue}\verb+mex+} setting reduces the interline distance, and \,{\color{blue}\verb+mar+} reduces the number of lines that surround the plot region. 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.4.3.b. 四个带状图}

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.7\textwidth]{energy-expend-stripchart-stature.png}
\caption{Stripcharts in four variations.}
\end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.5.1.a. 生成表格 Generating tables}

\begin{itemize}

\item  {\color{red}问题： Altman (1991, p. 242) contains an example on caffeine consumption by marital status among women giving birth. 将这些数据输入表格。}

\begin{table}[ht!]
\begin{tabular}{c|cccc}
marital $\backslash$ consumption  & 0 & 1-150 & 151-300 & >300 \\ \hline 
Married & 652 & 1537 & 598 & 242 \\ 
Prev.married & 36 & 46 & 38 & 21 \\
Single & 218 & 327 & 106 & 67 \\
\end{tabular}
\end{table}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.5.1.b. 生成表格 Generating tables}

\begin{itemize}

\item 解答：

\begin{lstlisting}[language=R]
data <- c(652,1537,598,242,36,46,38,21,218,327,106,67)
caff.marital <- matrix(data,nrow=3,byrow=T)
colnames(caff.marital) 
+    <- c("0","1-150","151-300",">300")
rownames(caff.marital)  
+    <- c("Married","Prev.married","Single")
names(dimnames(caff.marital)) 
+    <- c("marital","consumption")
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.5.1.c. 表格呈现的汇总统计量}

\begin{itemize}
\item  {\color{red}问题：将上述表格数据转换成数据框。}

%\item 解答：
\begin{lstlisting}[language=R]
as.data.frame(as.table(caff.marital))
\end{lstlisting}

\item  {\color{red}使用 \,{\color{blue}\verb+table, xtabs, ftable+}  这三个函数，从 \,{\color{blue}\verb+juul, stroke+} 这两个数据框得出分类表格。}

%\item 解答：
\begin{lstlisting}[language=R]
table(sex)
table(sex,menarche)
table(menarche,tanner)
xtabs(~ tanner + sex, data=juul)
xtabs(~ dgn + diab + coma, data=stroke)
ftable(coma + diab ~ dgn, data=stroke)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.5.2. Marginal tables and relative frequency}

\begin{itemize}
\item  {\color{red}问题：载入 \,{\color{blue}\verb+juul+} 数据框，按照  \,{\color{blue}\verb+tanner+} 和  \,{\color{blue}\verb+sex+} 分组，统计人数。 }

\item 解答：使用 \,{\color{blue}\verb+margin.table+}  函数得到边际表格。
\begin{lstlisting}[language=R]
tanner.sex <- table(tanner,sex)
tanner.sex
margin.table(tanner.sex,1)
margin.table(tanner.sex,2)
prop.table(tanner.sex,1)
tanner.sex/sum(tanner.sex)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.1.a. 条形图 Barplots}

\begin{itemize}
\item  {\color{red}问题：画出咖啡因消耗数据的条形图。}

%\item 解答：
\begin{lstlisting}[language=R]
total.caff <- margin.table(caff.marital,2)
total.caff
barplot(total.caff,col='white')
\end{lstlisting}

\begin{figure}
\centering
\includegraphics[height=0.35\textheight, width=0.7\textwidth]{caff-consum-barplot.png}
\caption{Simple barplot of total caffeine consumption.}
\end{figure}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.1.b. 更多的条形图（程序） }

\begin{itemize}
\item  {\color{red}问题：矩阵作为参数，画出二维数据的条形图。}

\item 解答：使用 \,{\color{blue}\verb+t()+} 函数将表格进行转置。
\begin{lstlisting}[language=R]
par(mfrow=c(2,2))
barplot(caff.marital,col='white')
barplot(t(caff.marital),col='white')
barplot(t(caff.marital),col='white',beside=T)
barplot(prop.table(t(caff.marital),2),col='white', 
+    beside=T)
par(mfrow=c(1,1))
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.1.c. 更多的条形图（图形） }

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.7\textwidth]{caff-consum-barplot-4.png}
\caption{Four variants of barplot on a two-way table.}
\end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.1.d. 使用不同颜色的条形图}

\begin{itemize}
\item  {\color{red}问题：对 \,{\color{blue}\verb+caff.marital+} 数据， 按 \,{\color{blue}\verb+marital+} 分类，作出 \,{\color{blue}\verb+caff+} 的条形图。}

%\item 解答：
\begin{lstlisting}[language=R]
barplot(prop.table(t(caff.marital),2),beside=T,
+ legend.text=colnames(caff.marital),
+ col=c("white","green","blue","red"))
\end{lstlisting}

\begin{figure}
\centering
\includegraphics[height=0.36\textheight, width=0.7\textwidth]{caff-consum-barplot-color.png}
\caption{ Bar plot with specified colours and legend.}
\end{figure}
     
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.2 点图 Dotcharts}

\begin{itemize}
\item  {\color{red}问题：画出 \,{\color{blue}\verb+caff.marital+} 数据的点图。}
%\item 解答：
\begin{lstlisting}[language=R]
dotchart(t(caff.marital), lcolor='blue')
\end{lstlisting}

\begin{figure}
\centering
\includegraphics[height=0.5\textheight, width=0.7\textwidth]{caff-consum-dotchart.png}
\caption{ Dotchart of caffeine consumption.}
 \end{figure}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.3.a. 饼图 Piecharts （程序）}

\begin{itemize}
\item  {\color{red}问题：画出 \,{\color{blue}\verb+caff.marital+} 数据的饼图。}

\item 解答：注意对矩阵数据的选取一行的方式。
\begin{lstlisting}[language=R]
opar <- par(mfrow=c(2,2),mex=0.8,mar=c(1,1,2,1))
slices <- c('white','green','blue','red')
pie(caff.marital['Married',],main='Married',col=slices)
pie(caff.marital['Prev.married',],main='Prev.Married', 
+    col=slices)
pie(caff.marital['Single',],main='Single',col=slices)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.6.3.b. 饼图 Piecharts （图形）}

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.4\textwidth]{caff-consum-piechart.png}
\caption{ Pie charts of caffeine consumption according to marital status.}
\end{figure}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.1. Exercise.   }

\begin{itemize}

\item  Question. Explore the possibilities for different kinds of line and point plots. Vary the plot symbol, line type, line width, and colour. For example,
\begin{lstlisting}[language=R]
x <- 1:5   #1
y <- rexp(5,1)   #2
opar <- par(mfrow=c(2,2))   #3
plot(x, y, pch=15) # filled square   #4
plot(x, y, type="b", lty="dotted")   #5
plot(x, y, type="b", lwd=3)   #6
plot(x, y, type="o", col="blue")   #7
par(opar)   #8
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.1. Exercise.   }

\begin{itemize}

\item  下述说法中，哪句是不正确的？
\begin{enumerate}[(a)]
\item  第4行代码画了黑色圆形的点图。
\item  第5行代码画了虚线连起来的点图。
\item  第6行代码画了实线连起来的点图。
\item  第7行代码画了蓝色折线连起来的点图。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.1. Exercise - Answer.  }

\begin{itemize}

\item Answer.  (a). 第4行代码画了黑色方形的点图。


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.2. Exercise.   }

\begin{itemize}

\item  Question. If you make a plot like 
\begin{lstlisting}[language=R]
plot(rnorm(10), type='o') 
\end{lstlisting}
with overplotted lines and points, the lines will be visible inside the plotting symbols. How can this be avoided?

\begin{enumerate}[(a)]
\item  plot(rnorm(10), type='o', pch=21, bg='white')
\item  plot(rnorm(10), type='o', pch=21, bg='yellow')
\item  plot(rnorm(10), type='o', pch=21, bg='black')
\item  plot(rnorm(10), type='o', pch=21, bg='blue')
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.2. Exercise - Answer.  }

\begin{itemize}

\item Answer. (a). Use a filled symbol, and set the fill colour equal to the plot background:
\begin{lstlisting}[language=R]
plot(rnorm(10), type='o', pch=21, bg='white')
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.3. Exercise.   }

\begin{itemize}

\item  Question. How can you overlay two \,{\color{blue}\texttt{qqnorm}} plots in the same plotting area? What goes wrong if you try to generate the plot using \,{\color{blue}\texttt{type='l'}}, and how do you avoid that?

You can use \,{\color{blue}\texttt{qqnorm}} with \,{\color{blue}\texttt{plot.it=F}} and get a return value from which you can extract the range information (you could of course also get this “by eye”).


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.3. Exercise.   }

%\begin{itemize}

\begin{lstlisting}[language=R]
x1 <- rnorm(20)  
x2 <- rnorm(10)+1
q1 <- qqnorm(x1, plot.it=F)
q2 <- qqnorm(x2, plot.it=F)
xr <- range(q1$x, q2$x)
yr <- range(q1$y, q2$y)
qqnorm(x1, xlim=xr, ylim=yr)
points(q2, col='red')
\end{lstlisting}

%\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.3. Exercise.   }

\begin{itemize}

\item  下述说法中，哪句是不正确的？
\begin{enumerate}[(a)]
\item  变量 q1 保存了第一个qq图的横坐标和纵坐标。
\item  变量 q2 保存了第二个qq图的横坐标和纵坐标。
\item  函数 range 返回参数的最小值和最大值。
\item  这两个qq图都是数据关于均匀分布的qq图。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.3. Exercise - Answer.   }

\begin{itemize}

\item  Answer. (d). 这两个qq图都是数据关于正态分布的qq图。

\item  Here, \,{\color{blue}\texttt{qqnorm}} is used for the basic plot to get the labels right. Then \,{\color{blue}\texttt{points}} is used with \,{\color{blue}\texttt{q2}} for the overlay.

\item  Setting \,{\color{blue}\texttt{type='l'}} gives a messy plot because the values are not plotted in order. 
The remedy is to use \,{\color{blue}\texttt{sort(x1)}} and \,{\color{blue}\texttt{sort(x2)}}.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.4. Exercise.   }

\begin{itemize}

\item  Question. Plot a histogram for the \,{\color{blue}\texttt{react}} data set. Since these data are highly discretized, the histogram will be biased. Why? You may want to try \,{\color{blue}\texttt{truehist}} from the \,{\color{blue}\texttt{MASS}} package as a replacement.

The breaks occur at integer values, as do the data. Data on the boundary are counted in the column to the left of it, effectively shifting the histogram half a unit left. The \,{\color{blue}\texttt{truehist}} function allows you to specify a better set of breaks.
\begin{lstlisting}[language=R]
hist(react)
library(MASS)
truehist(react,h=1,x0=.5)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.4. Exercise.   }

\begin{itemize}

\item  下述说法中，哪句是不正确的？
\begin{enumerate}[(a)]
\item  数据 react 是一些整数。
\item  由于 hist 函数默认将整数计数在左边的区间，第一行程序得到的直方图有点左偏。
\item  truehist 的 h 参数指定了直方图的区间宽度。
\item  truehist 的 x0 参数指定了直方图的区间端点为 nh-x0, 其中 n 取遍整数。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.4. Exercise - Answer.  }

\begin{itemize}

\item Answer. (d). truehist 的 x0 参数指定了直方图的区间端点为 nh-x0, 其中 n 取遍整数。
\begin{lstlisting}[language=R]
?truehist
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.5. Exercise.   }

\begin{itemize}

\item  Question. Generate a sample vector \,{\color{blue}\texttt{z}} of five random numbers from the uniform distribution, and plot \,{\color{blue}\texttt{quantile(z,x)}} as a function of \,{\color{blue}\texttt{x}} (use \,{\color{blue}\texttt{curve}}, for instance).
%Choose the statement that is incorrect. 
\begin{lstlisting}[language=R]
z <- runif(5)
curve(quantile(z,x), from=0, to=1)
\end{lstlisting}

下述说法中，哪句是不正确的？
\begin{enumerate}[(a)]
\item  变量 z 是服从标准均匀分布的五个随机数。
\item  变量 z 是分位数函数 quantile(z,x) 的自变量。
\item  quantile(z,0) 返回 z 中的最小值。
\item  quantile(z,1) 返回 z 中的最大值。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.7.5. Exercise - Answer.  }

\begin{itemize}

\item Answer. (b).  变量 x 是分位数函数 quantile(z,x) 的自变量。

The thing to notice is the linear interpolation between data points. 


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.1. 单项选择题   }

\begin{itemize}

\item  %第1题
查看1871-1970年的尼罗河在阿斯旺的年流量数据，这是一个时间序列数据，单位是亿立方米。
下述程序计算这100个数据的10分位数。
\begin{lstlisting}[language=R]
Nile
?Nile
class(Nile)
pvec <- seq(0,1,0.1)
quantile(Nile, pvec)
\end{lstlisting}

\item  找出年流量最大的十年里，年流量的范围。
\begin{enumerate}[(a)]
\item  在1160与1370之间。
\item  在1100与1370之间。
\item  在1060与1370之间。
\item  在1000与1370之间。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.1. 单项选择题   }

\begin{itemize}

\item  解答：(a).
直接读出 90\%分位数，即得有十年的流量大于等于1160亿立方米。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.2. 单项选择题   }

\begin{itemize}

\item  %第2题
载入课程教材的 {\color{blue}\verb+ISwR+} 数据框程序包，并研究 {\color{blue}\verb+juul+} 数据框。
\begin{lstlisting}[language=R]
library(ISwR)  #1
head(juul) #2
mydata <- juul[juul$tanner==1,]  #3
nrow(mydata)  #4
head(mydata)  #5
mydata <- mydata[,c('age','sex','igf1')]  #6
mydata <- mydata[complete.cases(mydata),]  #7
head(mydata)  #8
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.2. 单项选择题   }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  第三行命令找出了 {\color{blue}\verb+tanner+} 变量等于1的所有观测。
\item  第四行命令计算了这个数据框有多少个观测。
\item  第六行命令删除了这个数据框里除了这三个变量之外的其它变量。
\item  第七行命令删除了这个数据框里存在缺失值的那些观测。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.2. 单项选择题   }

\begin{itemize}

\item  解答：(a).
第三行命令的结果是tanner 变量等于1或者是缺失值的所有观测。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.3. 单项选择题   }

\begin{itemize}

\item  %第3题
继续研究 {\color{blue}\verb+juul+} 数据框。
\begin{lstlisting}[language=R]
mydata <- juul[juul$tanner==1,]  #1
mydata <- mydata[,c('age','sex','igf1')]  #2
mydata <- mydata[complete.cases(mydata),]  #3
summary(mydata)  #4
mydata$sex <- factor(mydata$sex, 
+ labels=c('male','female'))  #5
summary(mydata)  #6
hist(mydata$igf1)  #7
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.3. 单项选择题   }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  第四行命令显示 {\color{blue}\verb+sex+} 变量是个数值型。
\item  第五行命令将 {\color{blue}\verb+sex+} 变量转换成因子型。
\item  第六行命令显示这个数据框里两种性别的人分别有多少。
\item  第七行命令画出了 {\color{blue}\verb+igf1+} 变量的直方图，结果发现取值在100-150之间的观测为最多。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.3. 单项选择题   }

\begin{itemize}

\item  解答：(d).
默认按照间距50绘制直方图，结果发现取值在区间150-200的观测为最多。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.4. 单项选择题   }

\begin{itemize}

\item  %第4题
继续研究上一题的 {\color{blue}\verb+mydata+} 数据框。
\begin{lstlisting}[language=R]
x <- mydata$igf1   #1
n <- length(x)   #2
par(mfrow=c(2,1))   #3
plot(sort(x),(1:n)/n,type='s')   #4
title('empirical cumulative distribution function')   #5
x1 <- seq(min(x),max(x),(max(x)-min(x))/length(x))   #6
y1 <- pnorm(x1,mean=mean(x),sd=sd(x))   #7
plot(x1,y1,type='s')   #8
title('theoretical cumulative distribution function') #9
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.4. 单项选择题   }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  第四行中的 {\color{blue}\verb+sort+} 函数是将数据从小到大排序。
\item  第四行画出了数据的经验分布函数。
\item  第六行设置一个与变量 {\color{blue}\verb+x+} 有相同取值范围和分量个数的等差数列。
\item  第八行画出了正态分布的分布函数。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.4. 单项选择题   }

\begin{itemize}

\item  解答：(c).
这两个变量的取值范围相同，但是变量个数不一样。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.5. 单项选择题   }

\begin{itemize}

\item  %第5题
为研究上一题的 \,{\color{blue}\verb+mydata+} 数据框中的 \,{\color{blue}\verb+igf1+} 数据的正态性，我们测试下述命令。
\begin{lstlisting}[language=R]
x <- mydata$igf1   #1
qqnorm(x)   #2
x2 <- rnorm(311)   #3
qqnorm(x2)   #4
\end{lstlisting}

\item  下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  结果表明数据 {\color{blue}\verb+x2+} 更符合正态分布。
\item  在这两个QQ图里，理论分位数是放在横坐标上的。
%\item QQ图上的点投影到纵坐标上，是等间距的。
\item QQ图上的点投影到横坐标上，是等间距的。
\item  在这两个QQ图里，这些点的纵坐标就是所给的数据。
%\item  这个QQ图的工作原理是，如果两组数据都来自正态分布，那么它们的分位数的疏密程度是一样的。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.5. 单项选择题   }

\begin{itemize}

\item  解答：(c).
QQ图上的点投影到横坐标上，是所要检验的分布的等概率的分位数。除非是均匀分布，否则这些分位数不会是等间距的。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.6. 单项选择题   }

\begin{itemize}

\item %第6题
设总体 $X$ 的一个样本是 $5, 4, 5, 4, 5, 5, 1, 2, 1, 1$. 设 $F_e(x)$ 是经验分布函数，则 $F_e(2)$ 的值是多少？

\begin{enumerate}[(a)]
\item  $0.3$.
\item  $0.4$.  
\item  $0.5$.
\item  $0.6$.  
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.6. 单项选择题   }

\begin{itemize}

\item  解答：(b).
按照经验分布函数的定义，$F_e(x)$ 的值是样本中取值小于或等于 $x$ 的频率。题目的样本中，样本容量 $n=10$, 小于或等于 2 的个体有 4 个，所以 $F_e(2)=4/10$. 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.7. 单项选择题   }

\begin{itemize}

\item  %第7题
载入课程教材的 {\color{blue}\verb+ISwR+} 数据框程序包，并研究 {\color{blue}\verb+juul+} 数据框。

\begin{lstlisting}[language=R]
library(ISwR)   #1
head(juul)   #2
mydata <- subset(juul,tanner==1 | tanner==2)   #3
mydata <- mydata[,c(1,3,4,5)]   #4
summary(mydata)   #5
mydata <- mydata[complete.cases(mydata),]   #6
mydata$tanner <- factor(mydata$tanner, 
+ labels=c('i','ii'))   #7
mydata$sex <- factor(mydata$sex,labels=c('m','f'))   #8
summary(mydata)   #9
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.7. 单项选择题   }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  第三行命令选取了变量 {\color{blue}\verb+tanner+} 取值为 1 或 2 的所有记录。
\item  第四行命令选取了该数据框的第 1,3,4,5 个变量的所有记录。
\item  第六行命令选取了该数据框的有完整观测的所有记录。
\item  最后运行结果显示 {\color{blue}\verb+tanner+} 为一期和二期的人数分别为 310 人和 71 人。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.7. 单项选择题   }

\begin{itemize}

\item  解答：(d).
最后数据框 {\color{blue}\verb+mydata+} 中 {\color{blue}\verb+tanner+} 为一期和二期的人数分别为 311 人和 70 人。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.8. 单项选择题   }

\begin{itemize}

\item  %第8题
接着上一题的数据框 \,{\color{blue}\verb+mydata+}, 这是一个分类数据。
%为计算男孩的平均生长因子的值，下述哪个选项无法实现？
运行下述命令来练习 \,{\color{blue}\verb+tapply+} 函数和 \,{\color{blue}\verb+aggregate+} 函数。
\begin{lstlisting}[language=R]
summary(mydata)   #1
attach(mydata)   #2
tapply(igf1,sex,mean)   #3
tapply(igf1,sex,sd)   #4
tapply(igf1,tanner,mean)   #5
tapply(igf1,tanner,sd)   #6
aggregate(mydata[c('age','igf1')], 
+ list(sex=sex),mean)   #7
aggregate(mydata[c('age','igf1')], 
+ list(sex=sex,tanner=tanner),mean)   #8
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.8. 单项选择题   }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item   函数 \,{\color{blue}\verb+tapply+} 的第一个参数是目标变量，第二个参数是分类变量，第三个参数是准备分组进行的计算。
\item  结果显示，二期孩子们的生长因子的平均值是 352.67.
\item  函数 \,{\color{blue}\verb+aggregate+} 可以同时对若干个目标变量和若干个分组准则进行分组计算。
\item  结果显示，二期男孩子们的生长因子的平均值是 365.00.
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.8. 单项选择题   }

\begin{itemize}

\item  解答：(d).
结果显示，二期男孩子们的生长因子的平均值是 342.29.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.9. 单项选择题   }

\begin{itemize}

\item %第9题
接着上一题的数据框 \,{\color{blue}\verb+mydata+}, 我们想用直方图来表示分类数据。
运行下述命令。
\begin{lstlisting}[language=R]
attach(mydata)   #1
head(mydata)   #2
igf1.boy <- igf1[sex=='m']   #3
igf1.girl <- igf1[sex=='f']   #4
par(mfrow=c(2,1))   #5
hist(igf1.boy, breaks=10)   #6
hist(igf1.girl, breaks=10, freq=F)   #7
hist(igf1, breaks=10, labels=T, ylim=c(0,120))   #8
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.9. 单项选择题   }

\begin{itemize}

\item 下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  第三行命令选取了男孩组的生长因子数据，并保存为变量 \,{\color{blue}\verb+igf1.boy+}. 
\item  第五行命令是为接下来的两个画图命令做准备，将画出上下两个子图。
\item  第七行命令画出了女孩组的生长因子数据的直方图，并且纵坐标是频率密度。
\item  第八行命令画出的直方图可以看到，最多的一组有 76 人。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.9. 单项选择题   }

\begin{itemize}

\item  解答：(d).
最多的一组有 84 人。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.10. 单项选择题   }

\begin{itemize}

\item  %第10题
将下述表格数据保存为一个矩阵。
\begin{table}[ht!]
\centering
\caption{婚姻状况与喝咖啡的人数}\vspace{0.2cm}
\begin{tabular}{|c|c|c|c|c|} \hline
 & 不喝 & 1杯 & 2杯 & 3杯或以上 \\  \hline
已婚 & 652 & 1537 & 598 & 242 \\  \hline
离异 &   36 &     46 & 38   &   21 \\  \hline
单身 & 218 &   327 & 106 &  67 \\  \hline  
\end{tabular}
\end{table}

\item  运行下述命令。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.10. 单项选择题   }

\begin{lstlisting}[language=R]
x1 <- c(652,1537,598,242)   #1
x2 <- c(36,46,38,21)   #2
x3 <- c(218,327,106,67)   #3
caf.mar <- rbind(x1,x2,x3)   #4
caf.mar   #5
colnames(caf.mar) <- c('none','1-cup','2-cup','3-cup')   #6
rownames(caf.mar) <- c('married','divorced','single')   #7
caf.mar   #8
names(dimnames(caf.mar)) <- c('mar','caf')   #9
cm.tab <- as.table(caf.mar)   #10
cm.df <- as.data.frame(cm.tab)   #11
cm.df   #12
summary(cm.df)   #13
\end{lstlisting}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.10. 单项选择题   }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  第四行命令是将三个向量按行排成一个矩阵。
\item  第六、七行命令分别给矩阵的每一列和每一行命名。
\item  第十行命令得到一个表格。表格跟矩阵是一样的。
\item  第十一行命令得到一个数据框，其中有两个变量是因子型。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.8.10. 单项选择题   }

\begin{itemize}

\item  解答：(c).
表格跟矩阵是不一样的。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.9.1. 简答题   }

\begin{enumerate}

\item 生成服从连续区间 $[0,100]$ 上的均匀分布（或其它分布）的随机数40个。
\begin{enumerate}
\item 计算其均值、方差、标准差、中位数、四分位数、最小值和最大值。
\item 画出这个样本的三种直方图、箱式图、和经验累计分布函数。
\item 用QQ图来检查这个样本与正态分布的差别。
\end{enumerate}

\item 载入 \verb+ISwR+ 包，载入数据框 \verb+red.cell.folate+. 使用 \verb+tapply()+ 函数，按 \verb+ventilation+ 分组计算数据 \verb+folate+ 的均值、标准差、和样本容量。

\item 载入 \verb+ISwR+ 包中的数据框 \verb+energy+. 其中的数据 \verb+expend+ 按 \verb+stature+ 分组。
\begin{enumerate}
\item  作出分组数据 \verb+expend+ 的直方图。 
\item  作出分组数据 \verb+expend+ 的并联箱式图和带状图。
\end{enumerate}


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.9.4. 简答题   }

\begin{enumerate}\setcounter{enumi}{3}

\item  使用 matrix() 函数输入一个表格，并改变行和列的名称。使用 as.table() 函数和 as.data.frame() 函数将一个表格转化成一个数据框。

\item  研究 ISwR 程序包的 juul 数据框和 stroke数据框，解释 table() 函数、xtabs() 函数、ftable() 函数、t() 函数、margin.table() 函数、prop.table() 函数的使用方法。生成边际表格和相对频数表格。

\item  研究 ISwR 程序包的 caff.marital 数据，使用 barplot() 函数画出条形图，解释参数 color, beside, legend.text 的不同取值的含义。使用 dotchart() 函数画出点图。使用 pie() 函数画出饼图。

\item  研究不同类型的线和点图，包括图形的符号、线型、线宽、颜色等。如何避免画图的线出现在点图的符号内？


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{4.9.8. 简答题   }

\begin{enumerate}\setcounter{enumi}{7}


\item  解释分位数的概念和 QQ 图的含义。如何把通过 qqnorm() 函数得到的两个图放在同一个图形里？


\item  研究 ISwR 程序包的 react 数据框，使用 hist() 函数画出直方图，这个图有什么缺陷？使用 MASS 程序包里的 truehist() 函数画出直方图。

\item  从均匀分布的总体里生成 10 个随机数，得到样本 $z$. 对于每个实数 $x\in [0,1]$, $\text{quantile}(z,x)$ 计算了样本 $z$ 的 $x$ 分位数。研究 quantile() 函数。画出函数 $x\mapsto \text{quantile}(z,x)$ 的图像。


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[fragile]{1.20. }
%\begin{frame}{参考文献}
%
%\begin{thebibliography}{99}
%\bibitem{dalgaard-hao} Peter Dalgaard著，郝志恒等译，R语言统计入门，人民邮电出版社，2014年6月第一版。
%\bibitem{dalgaard} Peter Dalgaard. Introductory Statistics with R, Second Edition. Springer, New York, 2008.
%\bibitem{altman} D. G. Altman. Practical Statistics for Medical Research. Chapman and Hall, London, 1991. 
% 
%\end{thebibliography}
%
%\end{frame}
%
%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}
